from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import pdb
from sklearn.preprocessing import MaxAbsScaler # 小数标准化
from sklearn.preprocessing import MinMaxScaler # 离差标准化
from sklearn.preprocessing import StandardScaler  #标准差标准化


data = pd.read_csv("../ML_data/company.csv",
                   encoding='ANSI')

# 1. 筛选特征值【不是所有的特征都 有助于 结果分组】
train_X = data[["平均每次消费金额", "平均消费周期（天）"]]


std = StandardScaler()
# std.fit(train_X)  # 计算 各个列 均值和标准化
# train_X = std.transform(train_X)  # 实现转换

train_X = std.fit_transform(train_X)
print(type(train_X))  # <class 'numpy.ndarray'>


# 算法实例化
# n_clusters 聚类数目，默认8
km = KMeans(n_clusters=3,
            random_state=1, # 种子
            )

# 聚类算法 只有特征---只传特征
km.fit(train_X)

# 查看各个样本对应的组号
y_pred = km.predict(train_X)

print("对应的组号", y_pred)

center = km.cluster_centers_  # 查看聚类中心
print("聚类中心", center)


import matplotlib.pyplot as plt
def show_result(train_X, y_pred, center):
    plt.scatter(
        train_X[:,0],
        train_X[:,1],
        c=y_pred
    )

    # 绘制聚类中心
    plt.scatter(
        center[:, 0],
        center[:, 1],
        marker='*',
        s=200,
        # c=[0, 1, 2]  # 3种颜色，颜色使用默认颜色
        c=[3, 4, 5]  # 3种颜色，颜色使用默认颜色
    )
    plt.show()


show_result(train_X, y_pred, center)

print("每次消费200,20天来一次 对应的类别", km.predict([[200, 20]]))


# 消费金额   消费周期
"""
[[-0.31303177 -0.34597888] ----VIP
[ 1.97828686  0.12096992] ---- SVIP
[-0.30666025  2.75936563]] ----普通用户
"""